diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 400 |
1 files changed, 233 insertions, 167 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3e18175248e0..e83be2e4602c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -26,8 +26,8 @@ | |||
26 | #include <linux/workqueue.h> | 26 | #include <linux/workqueue.h> |
27 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
28 | #include <linux/freezer.h> | 28 | #include <linux/freezer.h> |
29 | #include <linux/crc32c.h> | ||
29 | #include "compat.h" | 30 | #include "compat.h" |
30 | #include "crc32c.h" | ||
31 | #include "ctree.h" | 31 | #include "ctree.h" |
32 | #include "disk-io.h" | 32 | #include "disk-io.h" |
33 | #include "transaction.h" | 33 | #include "transaction.h" |
@@ -36,12 +36,14 @@ | |||
36 | #include "print-tree.h" | 36 | #include "print-tree.h" |
37 | #include "async-thread.h" | 37 | #include "async-thread.h" |
38 | #include "locking.h" | 38 | #include "locking.h" |
39 | #include "ref-cache.h" | ||
40 | #include "tree-log.h" | 39 | #include "tree-log.h" |
40 | #include "free-space-cache.h" | ||
41 | 41 | ||
42 | static struct extent_io_ops btree_extent_io_ops; | 42 | static struct extent_io_ops btree_extent_io_ops; |
43 | static void end_workqueue_fn(struct btrfs_work *work); | 43 | static void end_workqueue_fn(struct btrfs_work *work); |
44 | 44 | ||
45 | static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); | ||
46 | |||
45 | /* | 47 | /* |
46 | * end_io_wq structs are used to do processing in task context when an IO is | 48 | * end_io_wq structs are used to do processing in task context when an IO is |
47 | * complete. This is used during reads to verify checksums, and it is used | 49 | * complete. This is used during reads to verify checksums, and it is used |
@@ -171,7 +173,7 @@ out: | |||
171 | 173 | ||
172 | u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) | 174 | u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) |
173 | { | 175 | { |
174 | return btrfs_crc32c(seed, data, len); | 176 | return crc32c(seed, data, len); |
175 | } | 177 | } |
176 | 178 | ||
177 | void btrfs_csum_final(u32 crc, char *result) | 179 | void btrfs_csum_final(u32 crc, char *result) |
@@ -231,10 +233,14 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
231 | memcpy(&found, result, csum_size); | 233 | memcpy(&found, result, csum_size); |
232 | 234 | ||
233 | read_extent_buffer(buf, &val, 0, csum_size); | 235 | read_extent_buffer(buf, &val, 0, csum_size); |
234 | printk(KERN_INFO "btrfs: %s checksum verify failed " | 236 | if (printk_ratelimit()) { |
235 | "on %llu wanted %X found %X level %d\n", | 237 | printk(KERN_INFO "btrfs: %s checksum verify " |
236 | root->fs_info->sb->s_id, | 238 | "failed on %llu wanted %X found %X " |
237 | buf->start, val, found, btrfs_header_level(buf)); | 239 | "level %d\n", |
240 | root->fs_info->sb->s_id, | ||
241 | (unsigned long long)buf->start, val, found, | ||
242 | btrfs_header_level(buf)); | ||
243 | } | ||
238 | if (result != (char *)&inline_result) | 244 | if (result != (char *)&inline_result) |
239 | kfree(result); | 245 | kfree(result); |
240 | return 1; | 246 | return 1; |
@@ -267,10 +273,13 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
267 | ret = 0; | 273 | ret = 0; |
268 | goto out; | 274 | goto out; |
269 | } | 275 | } |
270 | printk("parent transid verify failed on %llu wanted %llu found %llu\n", | 276 | if (printk_ratelimit()) { |
271 | (unsigned long long)eb->start, | 277 | printk("parent transid verify failed on %llu wanted %llu " |
272 | (unsigned long long)parent_transid, | 278 | "found %llu\n", |
273 | (unsigned long long)btrfs_header_generation(eb)); | 279 | (unsigned long long)eb->start, |
280 | (unsigned long long)parent_transid, | ||
281 | (unsigned long long)btrfs_header_generation(eb)); | ||
282 | } | ||
274 | ret = 1; | 283 | ret = 1; |
275 | clear_extent_buffer_uptodate(io_tree, eb); | 284 | clear_extent_buffer_uptodate(io_tree, eb); |
276 | out: | 285 | out: |
@@ -414,9 +423,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
414 | 423 | ||
415 | found_start = btrfs_header_bytenr(eb); | 424 | found_start = btrfs_header_bytenr(eb); |
416 | if (found_start != start) { | 425 | if (found_start != start) { |
417 | printk(KERN_INFO "btrfs bad tree block start %llu %llu\n", | 426 | if (printk_ratelimit()) { |
418 | (unsigned long long)found_start, | 427 | printk(KERN_INFO "btrfs bad tree block start " |
419 | (unsigned long long)eb->start); | 428 | "%llu %llu\n", |
429 | (unsigned long long)found_start, | ||
430 | (unsigned long long)eb->start); | ||
431 | } | ||
420 | ret = -EIO; | 432 | ret = -EIO; |
421 | goto err; | 433 | goto err; |
422 | } | 434 | } |
@@ -428,8 +440,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
428 | goto err; | 440 | goto err; |
429 | } | 441 | } |
430 | if (check_tree_block_fsid(root, eb)) { | 442 | if (check_tree_block_fsid(root, eb)) { |
431 | printk(KERN_INFO "btrfs bad fsid on block %llu\n", | 443 | if (printk_ratelimit()) { |
432 | (unsigned long long)eb->start); | 444 | printk(KERN_INFO "btrfs bad fsid on block %llu\n", |
445 | (unsigned long long)eb->start); | ||
446 | } | ||
433 | ret = -EIO; | 447 | ret = -EIO; |
434 | goto err; | 448 | goto err; |
435 | } | 449 | } |
@@ -578,19 +592,12 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
578 | async->bio_flags = bio_flags; | 592 | async->bio_flags = bio_flags; |
579 | 593 | ||
580 | atomic_inc(&fs_info->nr_async_submits); | 594 | atomic_inc(&fs_info->nr_async_submits); |
595 | |||
596 | if (rw & (1 << BIO_RW_SYNCIO)) | ||
597 | btrfs_set_work_high_prio(&async->work); | ||
598 | |||
581 | btrfs_queue_worker(&fs_info->workers, &async->work); | 599 | btrfs_queue_worker(&fs_info->workers, &async->work); |
582 | #if 0 | ||
583 | int limit = btrfs_async_submit_limit(fs_info); | ||
584 | if (atomic_read(&fs_info->nr_async_submits) > limit) { | ||
585 | wait_event_timeout(fs_info->async_submit_wait, | ||
586 | (atomic_read(&fs_info->nr_async_submits) < limit), | ||
587 | HZ/10); | ||
588 | 600 | ||
589 | wait_event_timeout(fs_info->async_submit_wait, | ||
590 | (atomic_read(&fs_info->nr_async_bios) < limit), | ||
591 | HZ/10); | ||
592 | } | ||
593 | #endif | ||
594 | while (atomic_read(&fs_info->async_submit_draining) && | 601 | while (atomic_read(&fs_info->async_submit_draining) && |
595 | atomic_read(&fs_info->nr_async_submits)) { | 602 | atomic_read(&fs_info->nr_async_submits)) { |
596 | wait_event(fs_info->async_submit_wait, | 603 | wait_event(fs_info->async_submit_wait, |
@@ -655,6 +662,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
655 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 662 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
656 | mirror_num, 0); | 663 | mirror_num, 0); |
657 | } | 664 | } |
665 | |||
658 | /* | 666 | /* |
659 | * kthread helpers are used to submit writes so that checksumming | 667 | * kthread helpers are used to submit writes so that checksumming |
660 | * can happen in parallel across all CPUs | 668 | * can happen in parallel across all CPUs |
@@ -668,14 +676,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
668 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | 676 | static int btree_writepage(struct page *page, struct writeback_control *wbc) |
669 | { | 677 | { |
670 | struct extent_io_tree *tree; | 678 | struct extent_io_tree *tree; |
679 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
680 | struct extent_buffer *eb; | ||
681 | int was_dirty; | ||
682 | |||
671 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 683 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
684 | if (!(current->flags & PF_MEMALLOC)) { | ||
685 | return extent_write_full_page(tree, page, | ||
686 | btree_get_extent, wbc); | ||
687 | } | ||
672 | 688 | ||
673 | if (current->flags & PF_MEMALLOC) { | 689 | redirty_page_for_writepage(wbc, page); |
674 | redirty_page_for_writepage(wbc, page); | 690 | eb = btrfs_find_tree_block(root, page_offset(page), |
675 | unlock_page(page); | 691 | PAGE_CACHE_SIZE); |
676 | return 0; | 692 | WARN_ON(!eb); |
693 | |||
694 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | ||
695 | if (!was_dirty) { | ||
696 | spin_lock(&root->fs_info->delalloc_lock); | ||
697 | root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE; | ||
698 | spin_unlock(&root->fs_info->delalloc_lock); | ||
677 | } | 699 | } |
678 | return extent_write_full_page(tree, page, btree_get_extent, wbc); | 700 | free_extent_buffer(eb); |
701 | |||
702 | unlock_page(page); | ||
703 | return 0; | ||
679 | } | 704 | } |
680 | 705 | ||
681 | static int btree_writepages(struct address_space *mapping, | 706 | static int btree_writepages(struct address_space *mapping, |
@@ -684,15 +709,15 @@ static int btree_writepages(struct address_space *mapping, | |||
684 | struct extent_io_tree *tree; | 709 | struct extent_io_tree *tree; |
685 | tree = &BTRFS_I(mapping->host)->io_tree; | 710 | tree = &BTRFS_I(mapping->host)->io_tree; |
686 | if (wbc->sync_mode == WB_SYNC_NONE) { | 711 | if (wbc->sync_mode == WB_SYNC_NONE) { |
712 | struct btrfs_root *root = BTRFS_I(mapping->host)->root; | ||
687 | u64 num_dirty; | 713 | u64 num_dirty; |
688 | u64 start = 0; | ||
689 | unsigned long thresh = 32 * 1024 * 1024; | 714 | unsigned long thresh = 32 * 1024 * 1024; |
690 | 715 | ||
691 | if (wbc->for_kupdate) | 716 | if (wbc->for_kupdate) |
692 | return 0; | 717 | return 0; |
693 | 718 | ||
694 | num_dirty = count_range_bits(tree, &start, (u64)-1, | 719 | /* this is a bit racy, but that's ok */ |
695 | thresh, EXTENT_DIRTY); | 720 | num_dirty = root->fs_info->dirty_metadata_bytes; |
696 | if (num_dirty < thresh) | 721 | if (num_dirty < thresh) |
697 | return 0; | 722 | return 0; |
698 | } | 723 | } |
@@ -747,27 +772,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) | |||
747 | } | 772 | } |
748 | } | 773 | } |
749 | 774 | ||
750 | #if 0 | ||
751 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | ||
752 | { | ||
753 | struct buffer_head *bh; | ||
754 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
755 | struct buffer_head *head; | ||
756 | if (!page_has_buffers(page)) { | ||
757 | create_empty_buffers(page, root->fs_info->sb->s_blocksize, | ||
758 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | ||
759 | } | ||
760 | head = page_buffers(page); | ||
761 | bh = head; | ||
762 | do { | ||
763 | if (buffer_dirty(bh)) | ||
764 | csum_tree_block(root, bh, 0); | ||
765 | bh = bh->b_this_page; | ||
766 | } while (bh != head); | ||
767 | return block_write_full_page(page, btree_get_block, wbc); | ||
768 | } | ||
769 | #endif | ||
770 | |||
771 | static struct address_space_operations btree_aops = { | 775 | static struct address_space_operations btree_aops = { |
772 | .readpage = btree_readpage, | 776 | .readpage = btree_readpage, |
773 | .writepage = btree_writepage, | 777 | .writepage = btree_writepage, |
@@ -845,8 +849,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | |||
845 | 849 | ||
846 | if (ret == 0) | 850 | if (ret == 0) |
847 | set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); | 851 | set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); |
848 | else | ||
849 | WARN_ON(1); | ||
850 | return buf; | 852 | return buf; |
851 | 853 | ||
852 | } | 854 | } |
@@ -859,9 +861,17 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
859 | root->fs_info->running_transaction->transid) { | 861 | root->fs_info->running_transaction->transid) { |
860 | btrfs_assert_tree_locked(buf); | 862 | btrfs_assert_tree_locked(buf); |
861 | 863 | ||
862 | /* ugh, clear_extent_buffer_dirty can be expensive */ | 864 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { |
863 | btrfs_set_lock_blocking(buf); | 865 | spin_lock(&root->fs_info->delalloc_lock); |
866 | if (root->fs_info->dirty_metadata_bytes >= buf->len) | ||
867 | root->fs_info->dirty_metadata_bytes -= buf->len; | ||
868 | else | ||
869 | WARN_ON(1); | ||
870 | spin_unlock(&root->fs_info->delalloc_lock); | ||
871 | } | ||
864 | 872 | ||
873 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ | ||
874 | btrfs_set_lock_blocking(buf); | ||
865 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, | 875 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, |
866 | buf); | 876 | buf); |
867 | } | 877 | } |
@@ -875,7 +885,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
875 | { | 885 | { |
876 | root->node = NULL; | 886 | root->node = NULL; |
877 | root->commit_root = NULL; | 887 | root->commit_root = NULL; |
878 | root->ref_tree = NULL; | ||
879 | root->sectorsize = sectorsize; | 888 | root->sectorsize = sectorsize; |
880 | root->nodesize = nodesize; | 889 | root->nodesize = nodesize; |
881 | root->leafsize = leafsize; | 890 | root->leafsize = leafsize; |
@@ -890,12 +899,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
890 | root->last_inode_alloc = 0; | 899 | root->last_inode_alloc = 0; |
891 | root->name = NULL; | 900 | root->name = NULL; |
892 | root->in_sysfs = 0; | 901 | root->in_sysfs = 0; |
902 | root->inode_tree.rb_node = NULL; | ||
893 | 903 | ||
894 | INIT_LIST_HEAD(&root->dirty_list); | 904 | INIT_LIST_HEAD(&root->dirty_list); |
895 | INIT_LIST_HEAD(&root->orphan_list); | 905 | INIT_LIST_HEAD(&root->orphan_list); |
896 | INIT_LIST_HEAD(&root->dead_list); | 906 | INIT_LIST_HEAD(&root->root_list); |
897 | spin_lock_init(&root->node_lock); | 907 | spin_lock_init(&root->node_lock); |
898 | spin_lock_init(&root->list_lock); | 908 | spin_lock_init(&root->list_lock); |
909 | spin_lock_init(&root->inode_lock); | ||
899 | mutex_init(&root->objectid_mutex); | 910 | mutex_init(&root->objectid_mutex); |
900 | mutex_init(&root->log_mutex); | 911 | mutex_init(&root->log_mutex); |
901 | init_waitqueue_head(&root->log_writer_wait); | 912 | init_waitqueue_head(&root->log_writer_wait); |
@@ -909,9 +920,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
909 | extent_io_tree_init(&root->dirty_log_pages, | 920 | extent_io_tree_init(&root->dirty_log_pages, |
910 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 921 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
911 | 922 | ||
912 | btrfs_leaf_ref_tree_init(&root->ref_tree_struct); | ||
913 | root->ref_tree = &root->ref_tree_struct; | ||
914 | |||
915 | memset(&root->root_key, 0, sizeof(root->root_key)); | 923 | memset(&root->root_key, 0, sizeof(root->root_key)); |
916 | memset(&root->root_item, 0, sizeof(root->root_item)); | 924 | memset(&root->root_item, 0, sizeof(root->root_item)); |
917 | memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); | 925 | memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); |
@@ -950,6 +958,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
950 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 958 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
951 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 959 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
952 | blocksize, generation); | 960 | blocksize, generation); |
961 | root->commit_root = btrfs_root_node(root); | ||
953 | BUG_ON(!root->node); | 962 | BUG_ON(!root->node); |
954 | return 0; | 963 | return 0; |
955 | } | 964 | } |
@@ -1016,20 +1025,19 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
1016 | */ | 1025 | */ |
1017 | root->ref_cows = 0; | 1026 | root->ref_cows = 0; |
1018 | 1027 | ||
1019 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 1028 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
1020 | 0, BTRFS_TREE_LOG_OBJECTID, | 1029 | BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0); |
1021 | trans->transid, 0, 0, 0); | ||
1022 | if (IS_ERR(leaf)) { | 1030 | if (IS_ERR(leaf)) { |
1023 | kfree(root); | 1031 | kfree(root); |
1024 | return ERR_CAST(leaf); | 1032 | return ERR_CAST(leaf); |
1025 | } | 1033 | } |
1026 | 1034 | ||
1035 | memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); | ||
1036 | btrfs_set_header_bytenr(leaf, leaf->start); | ||
1037 | btrfs_set_header_generation(leaf, trans->transid); | ||
1038 | btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); | ||
1039 | btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); | ||
1027 | root->node = leaf; | 1040 | root->node = leaf; |
1028 | btrfs_set_header_nritems(root->node, 0); | ||
1029 | btrfs_set_header_level(root->node, 0); | ||
1030 | btrfs_set_header_bytenr(root->node, root->node->start); | ||
1031 | btrfs_set_header_generation(root->node, trans->transid); | ||
1032 | btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID); | ||
1033 | 1041 | ||
1034 | write_extent_buffer(root->node, root->fs_info->fsid, | 1042 | write_extent_buffer(root->node, root->fs_info->fsid, |
1035 | (unsigned long)btrfs_header_fsid(root->node), | 1043 | (unsigned long)btrfs_header_fsid(root->node), |
@@ -1072,8 +1080,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
1072 | inode_item->nbytes = cpu_to_le64(root->leafsize); | 1080 | inode_item->nbytes = cpu_to_le64(root->leafsize); |
1073 | inode_item->mode = cpu_to_le32(S_IFDIR | 0755); | 1081 | inode_item->mode = cpu_to_le32(S_IFDIR | 0755); |
1074 | 1082 | ||
1075 | btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start); | 1083 | btrfs_set_root_node(&log_root->root_item, log_root->node); |
1076 | btrfs_set_root_generation(&log_root->root_item, trans->transid); | ||
1077 | 1084 | ||
1078 | WARN_ON(root->log_root); | 1085 | WARN_ON(root->log_root); |
1079 | root->log_root = log_root; | 1086 | root->log_root = log_root; |
@@ -1135,6 +1142,7 @@ out: | |||
1135 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1142 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
1136 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1143 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
1137 | blocksize, generation); | 1144 | blocksize, generation); |
1145 | root->commit_root = btrfs_root_node(root); | ||
1138 | BUG_ON(!root->node); | 1146 | BUG_ON(!root->node); |
1139 | insert: | 1147 | insert: |
1140 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { | 1148 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { |
@@ -1201,7 +1209,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | |||
1201 | } | 1209 | } |
1202 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 1210 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
1203 | ret = btrfs_find_dead_roots(fs_info->tree_root, | 1211 | ret = btrfs_find_dead_roots(fs_info->tree_root, |
1204 | root->root_key.objectid, root); | 1212 | root->root_key.objectid); |
1205 | BUG_ON(ret); | 1213 | BUG_ON(ret); |
1206 | btrfs_orphan_cleanup(root); | 1214 | btrfs_orphan_cleanup(root); |
1207 | } | 1215 | } |
@@ -1247,11 +1255,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1247 | int ret = 0; | 1255 | int ret = 0; |
1248 | struct btrfs_device *device; | 1256 | struct btrfs_device *device; |
1249 | struct backing_dev_info *bdi; | 1257 | struct backing_dev_info *bdi; |
1250 | #if 0 | 1258 | |
1251 | if ((bdi_bits & (1 << BDI_write_congested)) && | ||
1252 | btrfs_congested_async(info, 0)) | ||
1253 | return 1; | ||
1254 | #endif | ||
1255 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { | 1259 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { |
1256 | if (!device->bdev) | 1260 | if (!device->bdev) |
1257 | continue; | 1261 | continue; |
@@ -1340,12 +1344,25 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | |||
1340 | free_extent_map(em); | 1344 | free_extent_map(em); |
1341 | } | 1345 | } |
1342 | 1346 | ||
1347 | /* | ||
1348 | * If this fails, caller must call bdi_destroy() to get rid of the | ||
1349 | * bdi again. | ||
1350 | */ | ||
1343 | static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | 1351 | static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) |
1344 | { | 1352 | { |
1345 | bdi_init(bdi); | 1353 | int err; |
1354 | |||
1355 | bdi->capabilities = BDI_CAP_MAP_COPY; | ||
1356 | err = bdi_init(bdi); | ||
1357 | if (err) | ||
1358 | return err; | ||
1359 | |||
1360 | err = bdi_register(bdi, NULL, "btrfs-%d", | ||
1361 | atomic_inc_return(&btrfs_bdi_num)); | ||
1362 | if (err) | ||
1363 | return err; | ||
1364 | |||
1346 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1365 | bdi->ra_pages = default_backing_dev_info.ra_pages; |
1347 | bdi->state = 0; | ||
1348 | bdi->capabilities = default_backing_dev_info.capabilities; | ||
1349 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | 1366 | bdi->unplug_io_fn = btrfs_unplug_io_fn; |
1350 | bdi->unplug_io_data = info; | 1367 | bdi->unplug_io_data = info; |
1351 | bdi->congested_fn = btrfs_congested_fn; | 1368 | bdi->congested_fn = btrfs_congested_fn; |
@@ -1387,8 +1404,6 @@ static int bio_ready_for_csum(struct bio *bio) | |||
1387 | 1404 | ||
1388 | ret = extent_range_uptodate(io_tree, start + length, | 1405 | ret = extent_range_uptodate(io_tree, start + length, |
1389 | start + buf_len - 1); | 1406 | start + buf_len - 1); |
1390 | if (ret == 1) | ||
1391 | return ret; | ||
1392 | return ret; | 1407 | return ret; |
1393 | } | 1408 | } |
1394 | 1409 | ||
@@ -1471,12 +1486,6 @@ static int transaction_kthread(void *arg) | |||
1471 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1486 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1472 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1487 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
1473 | 1488 | ||
1474 | if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { | ||
1475 | printk(KERN_INFO "btrfs: total reference cache " | ||
1476 | "size %llu\n", | ||
1477 | root->fs_info->total_ref_cache_size); | ||
1478 | } | ||
1479 | |||
1480 | mutex_lock(&root->fs_info->trans_mutex); | 1489 | mutex_lock(&root->fs_info->trans_mutex); |
1481 | cur = root->fs_info->running_transaction; | 1490 | cur = root->fs_info->running_transaction; |
1482 | if (!cur) { | 1491 | if (!cur) { |
@@ -1493,6 +1502,7 @@ static int transaction_kthread(void *arg) | |||
1493 | mutex_unlock(&root->fs_info->trans_mutex); | 1502 | mutex_unlock(&root->fs_info->trans_mutex); |
1494 | trans = btrfs_start_transaction(root, 1); | 1503 | trans = btrfs_start_transaction(root, 1); |
1495 | ret = btrfs_commit_transaction(trans, root); | 1504 | ret = btrfs_commit_transaction(trans, root); |
1505 | |||
1496 | sleep: | 1506 | sleep: |
1497 | wake_up_process(root->fs_info->cleaner_kthread); | 1507 | wake_up_process(root->fs_info->cleaner_kthread); |
1498 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 1508 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
@@ -1552,6 +1562,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1552 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1562 | INIT_LIST_HEAD(&fs_info->dead_roots); |
1553 | INIT_LIST_HEAD(&fs_info->hashers); | 1563 | INIT_LIST_HEAD(&fs_info->hashers); |
1554 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 1564 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
1565 | INIT_LIST_HEAD(&fs_info->ordered_operations); | ||
1555 | spin_lock_init(&fs_info->delalloc_lock); | 1566 | spin_lock_init(&fs_info->delalloc_lock); |
1556 | spin_lock_init(&fs_info->new_trans_lock); | 1567 | spin_lock_init(&fs_info->new_trans_lock); |
1557 | spin_lock_init(&fs_info->ref_cache_lock); | 1568 | spin_lock_init(&fs_info->ref_cache_lock); |
@@ -1570,15 +1581,15 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1570 | atomic_set(&fs_info->async_delalloc_pages, 0); | 1581 | atomic_set(&fs_info->async_delalloc_pages, 0); |
1571 | atomic_set(&fs_info->async_submit_draining, 0); | 1582 | atomic_set(&fs_info->async_submit_draining, 0); |
1572 | atomic_set(&fs_info->nr_async_bios, 0); | 1583 | atomic_set(&fs_info->nr_async_bios, 0); |
1573 | atomic_set(&fs_info->throttles, 0); | ||
1574 | atomic_set(&fs_info->throttle_gen, 0); | ||
1575 | fs_info->sb = sb; | 1584 | fs_info->sb = sb; |
1576 | fs_info->max_extent = (u64)-1; | 1585 | fs_info->max_extent = (u64)-1; |
1577 | fs_info->max_inline = 8192 * 1024; | 1586 | fs_info->max_inline = 8192 * 1024; |
1578 | setup_bdi(fs_info, &fs_info->bdi); | 1587 | if (setup_bdi(fs_info, &fs_info->bdi)) |
1588 | goto fail_bdi; | ||
1579 | fs_info->btree_inode = new_inode(sb); | 1589 | fs_info->btree_inode = new_inode(sb); |
1580 | fs_info->btree_inode->i_ino = 1; | 1590 | fs_info->btree_inode->i_ino = 1; |
1581 | fs_info->btree_inode->i_nlink = 1; | 1591 | fs_info->btree_inode->i_nlink = 1; |
1592 | fs_info->metadata_ratio = 8; | ||
1582 | 1593 | ||
1583 | fs_info->thread_pool_size = min_t(unsigned long, | 1594 | fs_info->thread_pool_size = min_t(unsigned long, |
1584 | num_online_cpus() + 2, 8); | 1595 | num_online_cpus() + 2, 8); |
@@ -1598,6 +1609,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1598 | fs_info->btree_inode->i_mapping->a_ops = &btree_aops; | 1609 | fs_info->btree_inode->i_mapping->a_ops = &btree_aops; |
1599 | fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; | 1610 | fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; |
1600 | 1611 | ||
1612 | RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); | ||
1601 | extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, | 1613 | extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, |
1602 | fs_info->btree_inode->i_mapping, | 1614 | fs_info->btree_inode->i_mapping, |
1603 | GFP_NOFS); | 1615 | GFP_NOFS); |
@@ -1611,31 +1623,27 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1611 | 1623 | ||
1612 | extent_io_tree_init(&fs_info->pinned_extents, | 1624 | extent_io_tree_init(&fs_info->pinned_extents, |
1613 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1625 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
1614 | extent_io_tree_init(&fs_info->pending_del, | ||
1615 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1616 | extent_io_tree_init(&fs_info->extent_ins, | ||
1617 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1618 | fs_info->do_barriers = 1; | 1626 | fs_info->do_barriers = 1; |
1619 | 1627 | ||
1620 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); | ||
1621 | btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); | ||
1622 | btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); | ||
1623 | |||
1624 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | 1628 | BTRFS_I(fs_info->btree_inode)->root = tree_root; |
1625 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | 1629 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, |
1626 | sizeof(struct btrfs_key)); | 1630 | sizeof(struct btrfs_key)); |
1627 | insert_inode_hash(fs_info->btree_inode); | 1631 | insert_inode_hash(fs_info->btree_inode); |
1628 | 1632 | ||
1629 | mutex_init(&fs_info->trans_mutex); | 1633 | mutex_init(&fs_info->trans_mutex); |
1634 | mutex_init(&fs_info->ordered_operations_mutex); | ||
1630 | mutex_init(&fs_info->tree_log_mutex); | 1635 | mutex_init(&fs_info->tree_log_mutex); |
1631 | mutex_init(&fs_info->drop_mutex); | 1636 | mutex_init(&fs_info->drop_mutex); |
1632 | mutex_init(&fs_info->extent_ins_mutex); | ||
1633 | mutex_init(&fs_info->pinned_mutex); | ||
1634 | mutex_init(&fs_info->chunk_mutex); | 1637 | mutex_init(&fs_info->chunk_mutex); |
1635 | mutex_init(&fs_info->transaction_kthread_mutex); | 1638 | mutex_init(&fs_info->transaction_kthread_mutex); |
1636 | mutex_init(&fs_info->cleaner_mutex); | 1639 | mutex_init(&fs_info->cleaner_mutex); |
1637 | mutex_init(&fs_info->volume_mutex); | 1640 | mutex_init(&fs_info->volume_mutex); |
1638 | mutex_init(&fs_info->tree_reloc_mutex); | 1641 | mutex_init(&fs_info->tree_reloc_mutex); |
1642 | init_rwsem(&fs_info->extent_commit_sem); | ||
1643 | |||
1644 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | ||
1645 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | ||
1646 | |||
1639 | init_waitqueue_head(&fs_info->transaction_throttle); | 1647 | init_waitqueue_head(&fs_info->transaction_throttle); |
1640 | init_waitqueue_head(&fs_info->transaction_wait); | 1648 | init_waitqueue_head(&fs_info->transaction_wait); |
1641 | init_waitqueue_head(&fs_info->async_submit_wait); | 1649 | init_waitqueue_head(&fs_info->async_submit_wait); |
@@ -1670,17 +1678,23 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1670 | if (features) { | 1678 | if (features) { |
1671 | printk(KERN_ERR "BTRFS: couldn't mount because of " | 1679 | printk(KERN_ERR "BTRFS: couldn't mount because of " |
1672 | "unsupported optional features (%Lx).\n", | 1680 | "unsupported optional features (%Lx).\n", |
1673 | features); | 1681 | (unsigned long long)features); |
1674 | err = -EINVAL; | 1682 | err = -EINVAL; |
1675 | goto fail_iput; | 1683 | goto fail_iput; |
1676 | } | 1684 | } |
1677 | 1685 | ||
1686 | features = btrfs_super_incompat_flags(disk_super); | ||
1687 | if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { | ||
1688 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; | ||
1689 | btrfs_set_super_incompat_flags(disk_super, features); | ||
1690 | } | ||
1691 | |||
1678 | features = btrfs_super_compat_ro_flags(disk_super) & | 1692 | features = btrfs_super_compat_ro_flags(disk_super) & |
1679 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; | 1693 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; |
1680 | if (!(sb->s_flags & MS_RDONLY) && features) { | 1694 | if (!(sb->s_flags & MS_RDONLY) && features) { |
1681 | printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " | 1695 | printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " |
1682 | "unsupported option features (%Lx).\n", | 1696 | "unsupported option features (%Lx).\n", |
1683 | features); | 1697 | (unsigned long long)features); |
1684 | err = -EINVAL; | 1698 | err = -EINVAL; |
1685 | goto fail_iput; | 1699 | goto fail_iput; |
1686 | } | 1700 | } |
@@ -1772,7 +1786,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1772 | if (ret) { | 1786 | if (ret) { |
1773 | printk(KERN_WARNING "btrfs: failed to read the system " | 1787 | printk(KERN_WARNING "btrfs: failed to read the system " |
1774 | "array on %s\n", sb->s_id); | 1788 | "array on %s\n", sb->s_id); |
1775 | goto fail_sys_array; | 1789 | goto fail_sb_buffer; |
1776 | } | 1790 | } |
1777 | 1791 | ||
1778 | blocksize = btrfs_level_size(tree_root, | 1792 | blocksize = btrfs_level_size(tree_root, |
@@ -1786,6 +1800,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1786 | btrfs_super_chunk_root(disk_super), | 1800 | btrfs_super_chunk_root(disk_super), |
1787 | blocksize, generation); | 1801 | blocksize, generation); |
1788 | BUG_ON(!chunk_root->node); | 1802 | BUG_ON(!chunk_root->node); |
1803 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { | ||
1804 | printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", | ||
1805 | sb->s_id); | ||
1806 | goto fail_chunk_root; | ||
1807 | } | ||
1808 | btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); | ||
1809 | chunk_root->commit_root = btrfs_root_node(chunk_root); | ||
1789 | 1810 | ||
1790 | read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, | 1811 | read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, |
1791 | (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), | 1812 | (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), |
@@ -1811,7 +1832,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1811 | blocksize, generation); | 1832 | blocksize, generation); |
1812 | if (!tree_root->node) | 1833 | if (!tree_root->node) |
1813 | goto fail_chunk_root; | 1834 | goto fail_chunk_root; |
1814 | 1835 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { | |
1836 | printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", | ||
1837 | sb->s_id); | ||
1838 | goto fail_tree_root; | ||
1839 | } | ||
1840 | btrfs_set_root_node(&tree_root->root_item, tree_root->node); | ||
1841 | tree_root->commit_root = btrfs_root_node(tree_root); | ||
1815 | 1842 | ||
1816 | ret = find_and_setup_root(tree_root, fs_info, | 1843 | ret = find_and_setup_root(tree_root, fs_info, |
1817 | BTRFS_EXTENT_TREE_OBJECTID, extent_root); | 1844 | BTRFS_EXTENT_TREE_OBJECTID, extent_root); |
@@ -1821,14 +1848,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1821 | 1848 | ||
1822 | ret = find_and_setup_root(tree_root, fs_info, | 1849 | ret = find_and_setup_root(tree_root, fs_info, |
1823 | BTRFS_DEV_TREE_OBJECTID, dev_root); | 1850 | BTRFS_DEV_TREE_OBJECTID, dev_root); |
1824 | dev_root->track_dirty = 1; | ||
1825 | if (ret) | 1851 | if (ret) |
1826 | goto fail_extent_root; | 1852 | goto fail_extent_root; |
1853 | dev_root->track_dirty = 1; | ||
1827 | 1854 | ||
1828 | ret = find_and_setup_root(tree_root, fs_info, | 1855 | ret = find_and_setup_root(tree_root, fs_info, |
1829 | BTRFS_CSUM_TREE_OBJECTID, csum_root); | 1856 | BTRFS_CSUM_TREE_OBJECTID, csum_root); |
1830 | if (ret) | 1857 | if (ret) |
1831 | goto fail_extent_root; | 1858 | goto fail_dev_root; |
1832 | 1859 | ||
1833 | csum_root->track_dirty = 1; | 1860 | csum_root->track_dirty = 1; |
1834 | 1861 | ||
@@ -1850,6 +1877,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1850 | if (IS_ERR(fs_info->transaction_kthread)) | 1877 | if (IS_ERR(fs_info->transaction_kthread)) |
1851 | goto fail_cleaner; | 1878 | goto fail_cleaner; |
1852 | 1879 | ||
1880 | if (!btrfs_test_opt(tree_root, SSD) && | ||
1881 | !btrfs_test_opt(tree_root, NOSSD) && | ||
1882 | !fs_info->fs_devices->rotating) { | ||
1883 | printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD " | ||
1884 | "mode\n"); | ||
1885 | btrfs_set_opt(fs_info->mount_opt, SSD); | ||
1886 | } | ||
1887 | |||
1853 | if (btrfs_super_log_root(disk_super) != 0) { | 1888 | if (btrfs_super_log_root(disk_super) != 0) { |
1854 | u64 bytenr = btrfs_super_log_root(disk_super); | 1889 | u64 bytenr = btrfs_super_log_root(disk_super); |
1855 | 1890 | ||
@@ -1882,7 +1917,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1882 | } | 1917 | } |
1883 | 1918 | ||
1884 | if (!(sb->s_flags & MS_RDONLY)) { | 1919 | if (!(sb->s_flags & MS_RDONLY)) { |
1885 | ret = btrfs_cleanup_reloc_trees(tree_root); | 1920 | ret = btrfs_recover_relocation(tree_root); |
1886 | BUG_ON(ret); | 1921 | BUG_ON(ret); |
1887 | } | 1922 | } |
1888 | 1923 | ||
@@ -1893,6 +1928,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1893 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); | 1928 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); |
1894 | if (!fs_info->fs_root) | 1929 | if (!fs_info->fs_root) |
1895 | goto fail_trans_kthread; | 1930 | goto fail_trans_kthread; |
1931 | |||
1896 | return tree_root; | 1932 | return tree_root; |
1897 | 1933 | ||
1898 | fail_trans_kthread: | 1934 | fail_trans_kthread: |
@@ -1909,14 +1945,19 @@ fail_cleaner: | |||
1909 | 1945 | ||
1910 | fail_csum_root: | 1946 | fail_csum_root: |
1911 | free_extent_buffer(csum_root->node); | 1947 | free_extent_buffer(csum_root->node); |
1948 | free_extent_buffer(csum_root->commit_root); | ||
1949 | fail_dev_root: | ||
1950 | free_extent_buffer(dev_root->node); | ||
1951 | free_extent_buffer(dev_root->commit_root); | ||
1912 | fail_extent_root: | 1952 | fail_extent_root: |
1913 | free_extent_buffer(extent_root->node); | 1953 | free_extent_buffer(extent_root->node); |
1954 | free_extent_buffer(extent_root->commit_root); | ||
1914 | fail_tree_root: | 1955 | fail_tree_root: |
1915 | free_extent_buffer(tree_root->node); | 1956 | free_extent_buffer(tree_root->node); |
1957 | free_extent_buffer(tree_root->commit_root); | ||
1916 | fail_chunk_root: | 1958 | fail_chunk_root: |
1917 | free_extent_buffer(chunk_root->node); | 1959 | free_extent_buffer(chunk_root->node); |
1918 | fail_sys_array: | 1960 | free_extent_buffer(chunk_root->commit_root); |
1919 | free_extent_buffer(dev_root->node); | ||
1920 | fail_sb_buffer: | 1961 | fail_sb_buffer: |
1921 | btrfs_stop_workers(&fs_info->fixup_workers); | 1962 | btrfs_stop_workers(&fs_info->fixup_workers); |
1922 | btrfs_stop_workers(&fs_info->delalloc_workers); | 1963 | btrfs_stop_workers(&fs_info->delalloc_workers); |
@@ -1932,8 +1973,8 @@ fail_iput: | |||
1932 | 1973 | ||
1933 | btrfs_close_devices(fs_info->fs_devices); | 1974 | btrfs_close_devices(fs_info->fs_devices); |
1934 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 1975 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
1976 | fail_bdi: | ||
1935 | bdi_destroy(&fs_info->bdi); | 1977 | bdi_destroy(&fs_info->bdi); |
1936 | |||
1937 | fail: | 1978 | fail: |
1938 | kfree(extent_root); | 1979 | kfree(extent_root); |
1939 | kfree(tree_root); | 1980 | kfree(tree_root); |
@@ -2006,6 +2047,17 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) | |||
2006 | return latest; | 2047 | return latest; |
2007 | } | 2048 | } |
2008 | 2049 | ||
2050 | /* | ||
2051 | * this should be called twice, once with wait == 0 and | ||
2052 | * once with wait == 1. When wait == 0 is done, all the buffer heads | ||
2053 | * we write are pinned. | ||
2054 | * | ||
2055 | * They are released when wait == 1 is done. | ||
2056 | * max_mirrors must be the same for both runs, and it indicates how | ||
2057 | * many supers on this one device should be written. | ||
2058 | * | ||
2059 | * max_mirrors == 0 means to write them all. | ||
2060 | */ | ||
2009 | static int write_dev_supers(struct btrfs_device *device, | 2061 | static int write_dev_supers(struct btrfs_device *device, |
2010 | struct btrfs_super_block *sb, | 2062 | struct btrfs_super_block *sb, |
2011 | int do_barriers, int wait, int max_mirrors) | 2063 | int do_barriers, int wait, int max_mirrors) |
@@ -2041,12 +2093,16 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2041 | bh = __find_get_block(device->bdev, bytenr / 4096, | 2093 | bh = __find_get_block(device->bdev, bytenr / 4096, |
2042 | BTRFS_SUPER_INFO_SIZE); | 2094 | BTRFS_SUPER_INFO_SIZE); |
2043 | BUG_ON(!bh); | 2095 | BUG_ON(!bh); |
2044 | brelse(bh); | ||
2045 | wait_on_buffer(bh); | 2096 | wait_on_buffer(bh); |
2046 | if (buffer_uptodate(bh)) { | 2097 | if (!buffer_uptodate(bh)) |
2047 | brelse(bh); | 2098 | errors++; |
2048 | continue; | 2099 | |
2049 | } | 2100 | /* drop our reference */ |
2101 | brelse(bh); | ||
2102 | |||
2103 | /* drop the reference from the wait == 0 run */ | ||
2104 | brelse(bh); | ||
2105 | continue; | ||
2050 | } else { | 2106 | } else { |
2051 | btrfs_set_super_bytenr(sb, bytenr); | 2107 | btrfs_set_super_bytenr(sb, bytenr); |
2052 | 2108 | ||
@@ -2057,12 +2113,18 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2057 | BTRFS_CSUM_SIZE); | 2113 | BTRFS_CSUM_SIZE); |
2058 | btrfs_csum_final(crc, sb->csum); | 2114 | btrfs_csum_final(crc, sb->csum); |
2059 | 2115 | ||
2116 | /* | ||
2117 | * one reference for us, and we leave it for the | ||
2118 | * caller | ||
2119 | */ | ||
2060 | bh = __getblk(device->bdev, bytenr / 4096, | 2120 | bh = __getblk(device->bdev, bytenr / 4096, |
2061 | BTRFS_SUPER_INFO_SIZE); | 2121 | BTRFS_SUPER_INFO_SIZE); |
2062 | memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); | 2122 | memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); |
2063 | 2123 | ||
2064 | set_buffer_uptodate(bh); | 2124 | /* one reference for submit_bh */ |
2065 | get_bh(bh); | 2125 | get_bh(bh); |
2126 | |||
2127 | set_buffer_uptodate(bh); | ||
2066 | lock_buffer(bh); | 2128 | lock_buffer(bh); |
2067 | bh->b_end_io = btrfs_end_buffer_write_sync; | 2129 | bh->b_end_io = btrfs_end_buffer_write_sync; |
2068 | } | 2130 | } |
@@ -2074,30 +2136,24 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2074 | device->name); | 2136 | device->name); |
2075 | set_buffer_uptodate(bh); | 2137 | set_buffer_uptodate(bh); |
2076 | device->barriers = 0; | 2138 | device->barriers = 0; |
2139 | /* one reference for submit_bh */ | ||
2077 | get_bh(bh); | 2140 | get_bh(bh); |
2078 | lock_buffer(bh); | 2141 | lock_buffer(bh); |
2079 | ret = submit_bh(WRITE, bh); | 2142 | ret = submit_bh(WRITE_SYNC, bh); |
2080 | } | 2143 | } |
2081 | } else { | 2144 | } else { |
2082 | ret = submit_bh(WRITE, bh); | 2145 | ret = submit_bh(WRITE_SYNC, bh); |
2083 | } | 2146 | } |
2084 | 2147 | ||
2085 | if (!ret && wait) { | 2148 | if (ret) |
2086 | wait_on_buffer(bh); | ||
2087 | if (!buffer_uptodate(bh)) | ||
2088 | errors++; | ||
2089 | } else if (ret) { | ||
2090 | errors++; | 2149 | errors++; |
2091 | } | ||
2092 | if (wait) | ||
2093 | brelse(bh); | ||
2094 | } | 2150 | } |
2095 | return errors < i ? 0 : -1; | 2151 | return errors < i ? 0 : -1; |
2096 | } | 2152 | } |
2097 | 2153 | ||
2098 | int write_all_supers(struct btrfs_root *root, int max_mirrors) | 2154 | int write_all_supers(struct btrfs_root *root, int max_mirrors) |
2099 | { | 2155 | { |
2100 | struct list_head *head = &root->fs_info->fs_devices->devices; | 2156 | struct list_head *head; |
2101 | struct btrfs_device *dev; | 2157 | struct btrfs_device *dev; |
2102 | struct btrfs_super_block *sb; | 2158 | struct btrfs_super_block *sb; |
2103 | struct btrfs_dev_item *dev_item; | 2159 | struct btrfs_dev_item *dev_item; |
@@ -2112,6 +2168,9 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2112 | 2168 | ||
2113 | sb = &root->fs_info->super_for_commit; | 2169 | sb = &root->fs_info->super_for_commit; |
2114 | dev_item = &sb->dev_item; | 2170 | dev_item = &sb->dev_item; |
2171 | |||
2172 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
2173 | head = &root->fs_info->fs_devices->devices; | ||
2115 | list_for_each_entry(dev, head, dev_list) { | 2174 | list_for_each_entry(dev, head, dev_list) { |
2116 | if (!dev->bdev) { | 2175 | if (!dev->bdev) { |
2117 | total_errors++; | 2176 | total_errors++; |
@@ -2155,6 +2214,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2155 | if (ret) | 2214 | if (ret) |
2156 | total_errors++; | 2215 | total_errors++; |
2157 | } | 2216 | } |
2217 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
2158 | if (total_errors > max_errors) { | 2218 | if (total_errors > max_errors) { |
2159 | printk(KERN_ERR "btrfs: %d errors while writing supers\n", | 2219 | printk(KERN_ERR "btrfs: %d errors while writing supers\n", |
2160 | total_errors); | 2220 | total_errors); |
@@ -2174,6 +2234,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
2174 | 2234 | ||
2175 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | 2235 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) |
2176 | { | 2236 | { |
2237 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | ||
2177 | radix_tree_delete(&fs_info->fs_roots_radix, | 2238 | radix_tree_delete(&fs_info->fs_roots_radix, |
2178 | (unsigned long)root->root_key.objectid); | 2239 | (unsigned long)root->root_key.objectid); |
2179 | if (root->anon_super.s_dev) { | 2240 | if (root->anon_super.s_dev) { |
@@ -2220,10 +2281,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
2220 | ARRAY_SIZE(gang)); | 2281 | ARRAY_SIZE(gang)); |
2221 | if (!ret) | 2282 | if (!ret) |
2222 | break; | 2283 | break; |
2284 | |||
2285 | root_objectid = gang[ret - 1]->root_key.objectid + 1; | ||
2223 | for (i = 0; i < ret; i++) { | 2286 | for (i = 0; i < ret; i++) { |
2224 | root_objectid = gang[i]->root_key.objectid; | 2287 | root_objectid = gang[i]->root_key.objectid; |
2225 | ret = btrfs_find_dead_roots(fs_info->tree_root, | 2288 | ret = btrfs_find_dead_roots(fs_info->tree_root, |
2226 | root_objectid, gang[i]); | 2289 | root_objectid); |
2227 | BUG_ON(ret); | 2290 | BUG_ON(ret); |
2228 | btrfs_orphan_cleanup(gang[i]); | 2291 | btrfs_orphan_cleanup(gang[i]); |
2229 | } | 2292 | } |
@@ -2270,31 +2333,31 @@ int close_ctree(struct btrfs_root *root) | |||
2270 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2333 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
2271 | } | 2334 | } |
2272 | 2335 | ||
2336 | fs_info->closing = 2; | ||
2337 | smp_mb(); | ||
2338 | |||
2273 | if (fs_info->delalloc_bytes) { | 2339 | if (fs_info->delalloc_bytes) { |
2274 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", | 2340 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", |
2275 | fs_info->delalloc_bytes); | 2341 | (unsigned long long)fs_info->delalloc_bytes); |
2276 | } | 2342 | } |
2277 | if (fs_info->total_ref_cache_size) { | 2343 | if (fs_info->total_ref_cache_size) { |
2278 | printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", | 2344 | printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", |
2279 | (unsigned long long)fs_info->total_ref_cache_size); | 2345 | (unsigned long long)fs_info->total_ref_cache_size); |
2280 | } | 2346 | } |
2281 | 2347 | ||
2282 | if (fs_info->extent_root->node) | 2348 | free_extent_buffer(fs_info->extent_root->node); |
2283 | free_extent_buffer(fs_info->extent_root->node); | 2349 | free_extent_buffer(fs_info->extent_root->commit_root); |
2284 | 2350 | free_extent_buffer(fs_info->tree_root->node); | |
2285 | if (fs_info->tree_root->node) | 2351 | free_extent_buffer(fs_info->tree_root->commit_root); |
2286 | free_extent_buffer(fs_info->tree_root->node); | 2352 | free_extent_buffer(root->fs_info->chunk_root->node); |
2287 | 2353 | free_extent_buffer(root->fs_info->chunk_root->commit_root); | |
2288 | if (root->fs_info->chunk_root->node) | 2354 | free_extent_buffer(root->fs_info->dev_root->node); |
2289 | free_extent_buffer(root->fs_info->chunk_root->node); | 2355 | free_extent_buffer(root->fs_info->dev_root->commit_root); |
2290 | 2356 | free_extent_buffer(root->fs_info->csum_root->node); | |
2291 | if (root->fs_info->dev_root->node) | 2357 | free_extent_buffer(root->fs_info->csum_root->commit_root); |
2292 | free_extent_buffer(root->fs_info->dev_root->node); | ||
2293 | |||
2294 | if (root->fs_info->csum_root->node) | ||
2295 | free_extent_buffer(root->fs_info->csum_root->node); | ||
2296 | 2358 | ||
2297 | btrfs_free_block_groups(root->fs_info); | 2359 | btrfs_free_block_groups(root->fs_info); |
2360 | btrfs_free_pinned_extents(root->fs_info); | ||
2298 | 2361 | ||
2299 | del_fs_roots(fs_info); | 2362 | del_fs_roots(fs_info); |
2300 | 2363 | ||
@@ -2309,16 +2372,6 @@ int close_ctree(struct btrfs_root *root) | |||
2309 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2372 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2310 | btrfs_stop_workers(&fs_info->submit_workers); | 2373 | btrfs_stop_workers(&fs_info->submit_workers); |
2311 | 2374 | ||
2312 | #if 0 | ||
2313 | while (!list_empty(&fs_info->hashers)) { | ||
2314 | struct btrfs_hasher *hasher; | ||
2315 | hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher, | ||
2316 | hashers); | ||
2317 | list_del(&hasher->hashers); | ||
2318 | crypto_free_hash(&fs_info->hash_tfm); | ||
2319 | kfree(hasher); | ||
2320 | } | ||
2321 | #endif | ||
2322 | btrfs_close_devices(fs_info->fs_devices); | 2375 | btrfs_close_devices(fs_info->fs_devices); |
2323 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2376 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
2324 | 2377 | ||
@@ -2358,8 +2411,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
2358 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; | 2411 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; |
2359 | u64 transid = btrfs_header_generation(buf); | 2412 | u64 transid = btrfs_header_generation(buf); |
2360 | struct inode *btree_inode = root->fs_info->btree_inode; | 2413 | struct inode *btree_inode = root->fs_info->btree_inode; |
2361 | 2414 | int was_dirty; | |
2362 | btrfs_set_lock_blocking(buf); | ||
2363 | 2415 | ||
2364 | btrfs_assert_tree_locked(buf); | 2416 | btrfs_assert_tree_locked(buf); |
2365 | if (transid != root->fs_info->generation) { | 2417 | if (transid != root->fs_info->generation) { |
@@ -2370,7 +2422,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
2370 | (unsigned long long)root->fs_info->generation); | 2422 | (unsigned long long)root->fs_info->generation); |
2371 | WARN_ON(1); | 2423 | WARN_ON(1); |
2372 | } | 2424 | } |
2373 | set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); | 2425 | was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, |
2426 | buf); | ||
2427 | if (!was_dirty) { | ||
2428 | spin_lock(&root->fs_info->delalloc_lock); | ||
2429 | root->fs_info->dirty_metadata_bytes += buf->len; | ||
2430 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2431 | } | ||
2374 | } | 2432 | } |
2375 | 2433 | ||
2376 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 2434 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) |
@@ -2379,17 +2437,14 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
2379 | * looks as though older kernels can get into trouble with | 2437 | * looks as though older kernels can get into trouble with |
2380 | * this code, they end up stuck in balance_dirty_pages forever | 2438 | * this code, they end up stuck in balance_dirty_pages forever |
2381 | */ | 2439 | */ |
2382 | struct extent_io_tree *tree; | ||
2383 | u64 num_dirty; | 2440 | u64 num_dirty; |
2384 | u64 start = 0; | ||
2385 | unsigned long thresh = 32 * 1024 * 1024; | 2441 | unsigned long thresh = 32 * 1024 * 1024; |
2386 | tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | ||
2387 | 2442 | ||
2388 | if (current_is_pdflush() || current->flags & PF_MEMALLOC) | 2443 | if (current->flags & PF_MEMALLOC) |
2389 | return; | 2444 | return; |
2390 | 2445 | ||
2391 | num_dirty = count_range_bits(tree, &start, (u64)-1, | 2446 | num_dirty = root->fs_info->dirty_metadata_bytes; |
2392 | thresh, EXTENT_DIRTY); | 2447 | |
2393 | if (num_dirty > thresh) { | 2448 | if (num_dirty > thresh) { |
2394 | balance_dirty_pages_ratelimited_nr( | 2449 | balance_dirty_pages_ratelimited_nr( |
2395 | root->fs_info->btree_inode->i_mapping, 1); | 2450 | root->fs_info->btree_inode->i_mapping, 1); |
@@ -2410,6 +2465,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
2410 | int btree_lock_page_hook(struct page *page) | 2465 | int btree_lock_page_hook(struct page *page) |
2411 | { | 2466 | { |
2412 | struct inode *inode = page->mapping->host; | 2467 | struct inode *inode = page->mapping->host; |
2468 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2413 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 2469 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
2414 | struct extent_buffer *eb; | 2470 | struct extent_buffer *eb; |
2415 | unsigned long len; | 2471 | unsigned long len; |
@@ -2425,6 +2481,16 @@ int btree_lock_page_hook(struct page *page) | |||
2425 | 2481 | ||
2426 | btrfs_tree_lock(eb); | 2482 | btrfs_tree_lock(eb); |
2427 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | 2483 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); |
2484 | |||
2485 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | ||
2486 | spin_lock(&root->fs_info->delalloc_lock); | ||
2487 | if (root->fs_info->dirty_metadata_bytes >= eb->len) | ||
2488 | root->fs_info->dirty_metadata_bytes -= eb->len; | ||
2489 | else | ||
2490 | WARN_ON(1); | ||
2491 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2492 | } | ||
2493 | |||
2428 | btrfs_tree_unlock(eb); | 2494 | btrfs_tree_unlock(eb); |
2429 | free_extent_buffer(eb); | 2495 | free_extent_buffer(eb); |
2430 | out: | 2496 | out: |