aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c400
1 files changed, 233 insertions, 167 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3e18175248e0..e83be2e4602c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -26,8 +26,8 @@
26#include <linux/workqueue.h> 26#include <linux/workqueue.h>
27#include <linux/kthread.h> 27#include <linux/kthread.h>
28#include <linux/freezer.h> 28#include <linux/freezer.h>
29#include <linux/crc32c.h>
29#include "compat.h" 30#include "compat.h"
30#include "crc32c.h"
31#include "ctree.h" 31#include "ctree.h"
32#include "disk-io.h" 32#include "disk-io.h"
33#include "transaction.h" 33#include "transaction.h"
@@ -36,12 +36,14 @@
36#include "print-tree.h" 36#include "print-tree.h"
37#include "async-thread.h" 37#include "async-thread.h"
38#include "locking.h" 38#include "locking.h"
39#include "ref-cache.h"
40#include "tree-log.h" 39#include "tree-log.h"
40#include "free-space-cache.h"
41 41
42static struct extent_io_ops btree_extent_io_ops; 42static struct extent_io_ops btree_extent_io_ops;
43static void end_workqueue_fn(struct btrfs_work *work); 43static void end_workqueue_fn(struct btrfs_work *work);
44 44
45static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
46
45/* 47/*
46 * end_io_wq structs are used to do processing in task context when an IO is 48 * end_io_wq structs are used to do processing in task context when an IO is
47 * complete. This is used during reads to verify checksums, and it is used 49 * complete. This is used during reads to verify checksums, and it is used
@@ -171,7 +173,7 @@ out:
171 173
172u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) 174u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
173{ 175{
174 return btrfs_crc32c(seed, data, len); 176 return crc32c(seed, data, len);
175} 177}
176 178
177void btrfs_csum_final(u32 crc, char *result) 179void btrfs_csum_final(u32 crc, char *result)
@@ -231,10 +233,14 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
231 memcpy(&found, result, csum_size); 233 memcpy(&found, result, csum_size);
232 234
233 read_extent_buffer(buf, &val, 0, csum_size); 235 read_extent_buffer(buf, &val, 0, csum_size);
234 printk(KERN_INFO "btrfs: %s checksum verify failed " 236 if (printk_ratelimit()) {
235 "on %llu wanted %X found %X level %d\n", 237 printk(KERN_INFO "btrfs: %s checksum verify "
236 root->fs_info->sb->s_id, 238 "failed on %llu wanted %X found %X "
237 buf->start, val, found, btrfs_header_level(buf)); 239 "level %d\n",
240 root->fs_info->sb->s_id,
241 (unsigned long long)buf->start, val, found,
242 btrfs_header_level(buf));
243 }
238 if (result != (char *)&inline_result) 244 if (result != (char *)&inline_result)
239 kfree(result); 245 kfree(result);
240 return 1; 246 return 1;
@@ -267,10 +273,13 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
267 ret = 0; 273 ret = 0;
268 goto out; 274 goto out;
269 } 275 }
270 printk("parent transid verify failed on %llu wanted %llu found %llu\n", 276 if (printk_ratelimit()) {
271 (unsigned long long)eb->start, 277 printk("parent transid verify failed on %llu wanted %llu "
272 (unsigned long long)parent_transid, 278 "found %llu\n",
273 (unsigned long long)btrfs_header_generation(eb)); 279 (unsigned long long)eb->start,
280 (unsigned long long)parent_transid,
281 (unsigned long long)btrfs_header_generation(eb));
282 }
274 ret = 1; 283 ret = 1;
275 clear_extent_buffer_uptodate(io_tree, eb); 284 clear_extent_buffer_uptodate(io_tree, eb);
276out: 285out:
@@ -414,9 +423,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
414 423
415 found_start = btrfs_header_bytenr(eb); 424 found_start = btrfs_header_bytenr(eb);
416 if (found_start != start) { 425 if (found_start != start) {
417 printk(KERN_INFO "btrfs bad tree block start %llu %llu\n", 426 if (printk_ratelimit()) {
418 (unsigned long long)found_start, 427 printk(KERN_INFO "btrfs bad tree block start "
419 (unsigned long long)eb->start); 428 "%llu %llu\n",
429 (unsigned long long)found_start,
430 (unsigned long long)eb->start);
431 }
420 ret = -EIO; 432 ret = -EIO;
421 goto err; 433 goto err;
422 } 434 }
@@ -428,8 +440,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
428 goto err; 440 goto err;
429 } 441 }
430 if (check_tree_block_fsid(root, eb)) { 442 if (check_tree_block_fsid(root, eb)) {
431 printk(KERN_INFO "btrfs bad fsid on block %llu\n", 443 if (printk_ratelimit()) {
432 (unsigned long long)eb->start); 444 printk(KERN_INFO "btrfs bad fsid on block %llu\n",
445 (unsigned long long)eb->start);
446 }
433 ret = -EIO; 447 ret = -EIO;
434 goto err; 448 goto err;
435 } 449 }
@@ -578,19 +592,12 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
578 async->bio_flags = bio_flags; 592 async->bio_flags = bio_flags;
579 593
580 atomic_inc(&fs_info->nr_async_submits); 594 atomic_inc(&fs_info->nr_async_submits);
595
596 if (rw & (1 << BIO_RW_SYNCIO))
597 btrfs_set_work_high_prio(&async->work);
598
581 btrfs_queue_worker(&fs_info->workers, &async->work); 599 btrfs_queue_worker(&fs_info->workers, &async->work);
582#if 0
583 int limit = btrfs_async_submit_limit(fs_info);
584 if (atomic_read(&fs_info->nr_async_submits) > limit) {
585 wait_event_timeout(fs_info->async_submit_wait,
586 (atomic_read(&fs_info->nr_async_submits) < limit),
587 HZ/10);
588 600
589 wait_event_timeout(fs_info->async_submit_wait,
590 (atomic_read(&fs_info->nr_async_bios) < limit),
591 HZ/10);
592 }
593#endif
594 while (atomic_read(&fs_info->async_submit_draining) && 601 while (atomic_read(&fs_info->async_submit_draining) &&
595 atomic_read(&fs_info->nr_async_submits)) { 602 atomic_read(&fs_info->nr_async_submits)) {
596 wait_event(fs_info->async_submit_wait, 603 wait_event(fs_info->async_submit_wait,
@@ -655,6 +662,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
655 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, 662 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
656 mirror_num, 0); 663 mirror_num, 0);
657 } 664 }
665
658 /* 666 /*
659 * kthread helpers are used to submit writes so that checksumming 667 * kthread helpers are used to submit writes so that checksumming
660 * can happen in parallel across all CPUs 668 * can happen in parallel across all CPUs
@@ -668,14 +676,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
668static int btree_writepage(struct page *page, struct writeback_control *wbc) 676static int btree_writepage(struct page *page, struct writeback_control *wbc)
669{ 677{
670 struct extent_io_tree *tree; 678 struct extent_io_tree *tree;
679 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
680 struct extent_buffer *eb;
681 int was_dirty;
682
671 tree = &BTRFS_I(page->mapping->host)->io_tree; 683 tree = &BTRFS_I(page->mapping->host)->io_tree;
684 if (!(current->flags & PF_MEMALLOC)) {
685 return extent_write_full_page(tree, page,
686 btree_get_extent, wbc);
687 }
672 688
673 if (current->flags & PF_MEMALLOC) { 689 redirty_page_for_writepage(wbc, page);
674 redirty_page_for_writepage(wbc, page); 690 eb = btrfs_find_tree_block(root, page_offset(page),
675 unlock_page(page); 691 PAGE_CACHE_SIZE);
676 return 0; 692 WARN_ON(!eb);
693
694 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
695 if (!was_dirty) {
696 spin_lock(&root->fs_info->delalloc_lock);
697 root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
698 spin_unlock(&root->fs_info->delalloc_lock);
677 } 699 }
678 return extent_write_full_page(tree, page, btree_get_extent, wbc); 700 free_extent_buffer(eb);
701
702 unlock_page(page);
703 return 0;
679} 704}
680 705
681static int btree_writepages(struct address_space *mapping, 706static int btree_writepages(struct address_space *mapping,
@@ -684,15 +709,15 @@ static int btree_writepages(struct address_space *mapping,
684 struct extent_io_tree *tree; 709 struct extent_io_tree *tree;
685 tree = &BTRFS_I(mapping->host)->io_tree; 710 tree = &BTRFS_I(mapping->host)->io_tree;
686 if (wbc->sync_mode == WB_SYNC_NONE) { 711 if (wbc->sync_mode == WB_SYNC_NONE) {
712 struct btrfs_root *root = BTRFS_I(mapping->host)->root;
687 u64 num_dirty; 713 u64 num_dirty;
688 u64 start = 0;
689 unsigned long thresh = 32 * 1024 * 1024; 714 unsigned long thresh = 32 * 1024 * 1024;
690 715
691 if (wbc->for_kupdate) 716 if (wbc->for_kupdate)
692 return 0; 717 return 0;
693 718
694 num_dirty = count_range_bits(tree, &start, (u64)-1, 719 /* this is a bit racy, but that's ok */
695 thresh, EXTENT_DIRTY); 720 num_dirty = root->fs_info->dirty_metadata_bytes;
696 if (num_dirty < thresh) 721 if (num_dirty < thresh)
697 return 0; 722 return 0;
698 } 723 }
@@ -747,27 +772,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
747 } 772 }
748} 773}
749 774
750#if 0
751static int btree_writepage(struct page *page, struct writeback_control *wbc)
752{
753 struct buffer_head *bh;
754 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
755 struct buffer_head *head;
756 if (!page_has_buffers(page)) {
757 create_empty_buffers(page, root->fs_info->sb->s_blocksize,
758 (1 << BH_Dirty)|(1 << BH_Uptodate));
759 }
760 head = page_buffers(page);
761 bh = head;
762 do {
763 if (buffer_dirty(bh))
764 csum_tree_block(root, bh, 0);
765 bh = bh->b_this_page;
766 } while (bh != head);
767 return block_write_full_page(page, btree_get_block, wbc);
768}
769#endif
770
771static struct address_space_operations btree_aops = { 775static struct address_space_operations btree_aops = {
772 .readpage = btree_readpage, 776 .readpage = btree_readpage,
773 .writepage = btree_writepage, 777 .writepage = btree_writepage,
@@ -845,8 +849,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
845 849
846 if (ret == 0) 850 if (ret == 0)
847 set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); 851 set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
848 else
849 WARN_ON(1);
850 return buf; 852 return buf;
851 853
852} 854}
@@ -859,9 +861,17 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
859 root->fs_info->running_transaction->transid) { 861 root->fs_info->running_transaction->transid) {
860 btrfs_assert_tree_locked(buf); 862 btrfs_assert_tree_locked(buf);
861 863
862 /* ugh, clear_extent_buffer_dirty can be expensive */ 864 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
863 btrfs_set_lock_blocking(buf); 865 spin_lock(&root->fs_info->delalloc_lock);
866 if (root->fs_info->dirty_metadata_bytes >= buf->len)
867 root->fs_info->dirty_metadata_bytes -= buf->len;
868 else
869 WARN_ON(1);
870 spin_unlock(&root->fs_info->delalloc_lock);
871 }
864 872
873 /* ugh, clear_extent_buffer_dirty needs to lock the page */
874 btrfs_set_lock_blocking(buf);
865 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 875 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
866 buf); 876 buf);
867 } 877 }
@@ -875,7 +885,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
875{ 885{
876 root->node = NULL; 886 root->node = NULL;
877 root->commit_root = NULL; 887 root->commit_root = NULL;
878 root->ref_tree = NULL;
879 root->sectorsize = sectorsize; 888 root->sectorsize = sectorsize;
880 root->nodesize = nodesize; 889 root->nodesize = nodesize;
881 root->leafsize = leafsize; 890 root->leafsize = leafsize;
@@ -890,12 +899,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
890 root->last_inode_alloc = 0; 899 root->last_inode_alloc = 0;
891 root->name = NULL; 900 root->name = NULL;
892 root->in_sysfs = 0; 901 root->in_sysfs = 0;
902 root->inode_tree.rb_node = NULL;
893 903
894 INIT_LIST_HEAD(&root->dirty_list); 904 INIT_LIST_HEAD(&root->dirty_list);
895 INIT_LIST_HEAD(&root->orphan_list); 905 INIT_LIST_HEAD(&root->orphan_list);
896 INIT_LIST_HEAD(&root->dead_list); 906 INIT_LIST_HEAD(&root->root_list);
897 spin_lock_init(&root->node_lock); 907 spin_lock_init(&root->node_lock);
898 spin_lock_init(&root->list_lock); 908 spin_lock_init(&root->list_lock);
909 spin_lock_init(&root->inode_lock);
899 mutex_init(&root->objectid_mutex); 910 mutex_init(&root->objectid_mutex);
900 mutex_init(&root->log_mutex); 911 mutex_init(&root->log_mutex);
901 init_waitqueue_head(&root->log_writer_wait); 912 init_waitqueue_head(&root->log_writer_wait);
@@ -909,9 +920,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
909 extent_io_tree_init(&root->dirty_log_pages, 920 extent_io_tree_init(&root->dirty_log_pages,
910 fs_info->btree_inode->i_mapping, GFP_NOFS); 921 fs_info->btree_inode->i_mapping, GFP_NOFS);
911 922
912 btrfs_leaf_ref_tree_init(&root->ref_tree_struct);
913 root->ref_tree = &root->ref_tree_struct;
914
915 memset(&root->root_key, 0, sizeof(root->root_key)); 923 memset(&root->root_key, 0, sizeof(root->root_key));
916 memset(&root->root_item, 0, sizeof(root->root_item)); 924 memset(&root->root_item, 0, sizeof(root->root_item));
917 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 925 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
@@ -950,6 +958,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
950 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 958 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
951 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 959 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
952 blocksize, generation); 960 blocksize, generation);
961 root->commit_root = btrfs_root_node(root);
953 BUG_ON(!root->node); 962 BUG_ON(!root->node);
954 return 0; 963 return 0;
955} 964}
@@ -1016,20 +1025,19 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
1016 */ 1025 */
1017 root->ref_cows = 0; 1026 root->ref_cows = 0;
1018 1027
1019 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 1028 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
1020 0, BTRFS_TREE_LOG_OBJECTID, 1029 BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
1021 trans->transid, 0, 0, 0);
1022 if (IS_ERR(leaf)) { 1030 if (IS_ERR(leaf)) {
1023 kfree(root); 1031 kfree(root);
1024 return ERR_CAST(leaf); 1032 return ERR_CAST(leaf);
1025 } 1033 }
1026 1034
1035 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
1036 btrfs_set_header_bytenr(leaf, leaf->start);
1037 btrfs_set_header_generation(leaf, trans->transid);
1038 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
1039 btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
1027 root->node = leaf; 1040 root->node = leaf;
1028 btrfs_set_header_nritems(root->node, 0);
1029 btrfs_set_header_level(root->node, 0);
1030 btrfs_set_header_bytenr(root->node, root->node->start);
1031 btrfs_set_header_generation(root->node, trans->transid);
1032 btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID);
1033 1041
1034 write_extent_buffer(root->node, root->fs_info->fsid, 1042 write_extent_buffer(root->node, root->fs_info->fsid,
1035 (unsigned long)btrfs_header_fsid(root->node), 1043 (unsigned long)btrfs_header_fsid(root->node),
@@ -1072,8 +1080,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
1072 inode_item->nbytes = cpu_to_le64(root->leafsize); 1080 inode_item->nbytes = cpu_to_le64(root->leafsize);
1073 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 1081 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
1074 1082
1075 btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start); 1083 btrfs_set_root_node(&log_root->root_item, log_root->node);
1076 btrfs_set_root_generation(&log_root->root_item, trans->transid);
1077 1084
1078 WARN_ON(root->log_root); 1085 WARN_ON(root->log_root);
1079 root->log_root = log_root; 1086 root->log_root = log_root;
@@ -1135,6 +1142,7 @@ out:
1135 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1142 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1136 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1143 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1137 blocksize, generation); 1144 blocksize, generation);
1145 root->commit_root = btrfs_root_node(root);
1138 BUG_ON(!root->node); 1146 BUG_ON(!root->node);
1139insert: 1147insert:
1140 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { 1148 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
@@ -1201,7 +1209,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1201 } 1209 }
1202 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 1210 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
1203 ret = btrfs_find_dead_roots(fs_info->tree_root, 1211 ret = btrfs_find_dead_roots(fs_info->tree_root,
1204 root->root_key.objectid, root); 1212 root->root_key.objectid);
1205 BUG_ON(ret); 1213 BUG_ON(ret);
1206 btrfs_orphan_cleanup(root); 1214 btrfs_orphan_cleanup(root);
1207 } 1215 }
@@ -1247,11 +1255,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1247 int ret = 0; 1255 int ret = 0;
1248 struct btrfs_device *device; 1256 struct btrfs_device *device;
1249 struct backing_dev_info *bdi; 1257 struct backing_dev_info *bdi;
1250#if 0 1258
1251 if ((bdi_bits & (1 << BDI_write_congested)) &&
1252 btrfs_congested_async(info, 0))
1253 return 1;
1254#endif
1255 list_for_each_entry(device, &info->fs_devices->devices, dev_list) { 1259 list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
1256 if (!device->bdev) 1260 if (!device->bdev)
1257 continue; 1261 continue;
@@ -1340,12 +1344,25 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1340 free_extent_map(em); 1344 free_extent_map(em);
1341} 1345}
1342 1346
1347/*
1348 * If this fails, caller must call bdi_destroy() to get rid of the
1349 * bdi again.
1350 */
1343static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) 1351static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1344{ 1352{
1345 bdi_init(bdi); 1353 int err;
1354
1355 bdi->capabilities = BDI_CAP_MAP_COPY;
1356 err = bdi_init(bdi);
1357 if (err)
1358 return err;
1359
1360 err = bdi_register(bdi, NULL, "btrfs-%d",
1361 atomic_inc_return(&btrfs_bdi_num));
1362 if (err)
1363 return err;
1364
1346 bdi->ra_pages = default_backing_dev_info.ra_pages; 1365 bdi->ra_pages = default_backing_dev_info.ra_pages;
1347 bdi->state = 0;
1348 bdi->capabilities = default_backing_dev_info.capabilities;
1349 bdi->unplug_io_fn = btrfs_unplug_io_fn; 1366 bdi->unplug_io_fn = btrfs_unplug_io_fn;
1350 bdi->unplug_io_data = info; 1367 bdi->unplug_io_data = info;
1351 bdi->congested_fn = btrfs_congested_fn; 1368 bdi->congested_fn = btrfs_congested_fn;
@@ -1387,8 +1404,6 @@ static int bio_ready_for_csum(struct bio *bio)
1387 1404
1388 ret = extent_range_uptodate(io_tree, start + length, 1405 ret = extent_range_uptodate(io_tree, start + length,
1389 start + buf_len - 1); 1406 start + buf_len - 1);
1390 if (ret == 1)
1391 return ret;
1392 return ret; 1407 return ret;
1393} 1408}
1394 1409
@@ -1471,12 +1486,6 @@ static int transaction_kthread(void *arg)
1471 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1486 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1472 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1487 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1473 1488
1474 if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) {
1475 printk(KERN_INFO "btrfs: total reference cache "
1476 "size %llu\n",
1477 root->fs_info->total_ref_cache_size);
1478 }
1479
1480 mutex_lock(&root->fs_info->trans_mutex); 1489 mutex_lock(&root->fs_info->trans_mutex);
1481 cur = root->fs_info->running_transaction; 1490 cur = root->fs_info->running_transaction;
1482 if (!cur) { 1491 if (!cur) {
@@ -1493,6 +1502,7 @@ static int transaction_kthread(void *arg)
1493 mutex_unlock(&root->fs_info->trans_mutex); 1502 mutex_unlock(&root->fs_info->trans_mutex);
1494 trans = btrfs_start_transaction(root, 1); 1503 trans = btrfs_start_transaction(root, 1);
1495 ret = btrfs_commit_transaction(trans, root); 1504 ret = btrfs_commit_transaction(trans, root);
1505
1496sleep: 1506sleep:
1497 wake_up_process(root->fs_info->cleaner_kthread); 1507 wake_up_process(root->fs_info->cleaner_kthread);
1498 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 1508 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
@@ -1552,6 +1562,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1552 INIT_LIST_HEAD(&fs_info->dead_roots); 1562 INIT_LIST_HEAD(&fs_info->dead_roots);
1553 INIT_LIST_HEAD(&fs_info->hashers); 1563 INIT_LIST_HEAD(&fs_info->hashers);
1554 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1564 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1565 INIT_LIST_HEAD(&fs_info->ordered_operations);
1555 spin_lock_init(&fs_info->delalloc_lock); 1566 spin_lock_init(&fs_info->delalloc_lock);
1556 spin_lock_init(&fs_info->new_trans_lock); 1567 spin_lock_init(&fs_info->new_trans_lock);
1557 spin_lock_init(&fs_info->ref_cache_lock); 1568 spin_lock_init(&fs_info->ref_cache_lock);
@@ -1570,15 +1581,15 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1570 atomic_set(&fs_info->async_delalloc_pages, 0); 1581 atomic_set(&fs_info->async_delalloc_pages, 0);
1571 atomic_set(&fs_info->async_submit_draining, 0); 1582 atomic_set(&fs_info->async_submit_draining, 0);
1572 atomic_set(&fs_info->nr_async_bios, 0); 1583 atomic_set(&fs_info->nr_async_bios, 0);
1573 atomic_set(&fs_info->throttles, 0);
1574 atomic_set(&fs_info->throttle_gen, 0);
1575 fs_info->sb = sb; 1584 fs_info->sb = sb;
1576 fs_info->max_extent = (u64)-1; 1585 fs_info->max_extent = (u64)-1;
1577 fs_info->max_inline = 8192 * 1024; 1586 fs_info->max_inline = 8192 * 1024;
1578 setup_bdi(fs_info, &fs_info->bdi); 1587 if (setup_bdi(fs_info, &fs_info->bdi))
1588 goto fail_bdi;
1579 fs_info->btree_inode = new_inode(sb); 1589 fs_info->btree_inode = new_inode(sb);
1580 fs_info->btree_inode->i_ino = 1; 1590 fs_info->btree_inode->i_ino = 1;
1581 fs_info->btree_inode->i_nlink = 1; 1591 fs_info->btree_inode->i_nlink = 1;
1592 fs_info->metadata_ratio = 8;
1582 1593
1583 fs_info->thread_pool_size = min_t(unsigned long, 1594 fs_info->thread_pool_size = min_t(unsigned long,
1584 num_online_cpus() + 2, 8); 1595 num_online_cpus() + 2, 8);
@@ -1598,6 +1609,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1598 fs_info->btree_inode->i_mapping->a_ops = &btree_aops; 1609 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
1599 fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; 1610 fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
1600 1611
1612 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
1601 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, 1613 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
1602 fs_info->btree_inode->i_mapping, 1614 fs_info->btree_inode->i_mapping,
1603 GFP_NOFS); 1615 GFP_NOFS);
@@ -1611,31 +1623,27 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1611 1623
1612 extent_io_tree_init(&fs_info->pinned_extents, 1624 extent_io_tree_init(&fs_info->pinned_extents,
1613 fs_info->btree_inode->i_mapping, GFP_NOFS); 1625 fs_info->btree_inode->i_mapping, GFP_NOFS);
1614 extent_io_tree_init(&fs_info->pending_del,
1615 fs_info->btree_inode->i_mapping, GFP_NOFS);
1616 extent_io_tree_init(&fs_info->extent_ins,
1617 fs_info->btree_inode->i_mapping, GFP_NOFS);
1618 fs_info->do_barriers = 1; 1626 fs_info->do_barriers = 1;
1619 1627
1620 INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
1621 btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree);
1622 btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree);
1623
1624 BTRFS_I(fs_info->btree_inode)->root = tree_root; 1628 BTRFS_I(fs_info->btree_inode)->root = tree_root;
1625 memset(&BTRFS_I(fs_info->btree_inode)->location, 0, 1629 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
1626 sizeof(struct btrfs_key)); 1630 sizeof(struct btrfs_key));
1627 insert_inode_hash(fs_info->btree_inode); 1631 insert_inode_hash(fs_info->btree_inode);
1628 1632
1629 mutex_init(&fs_info->trans_mutex); 1633 mutex_init(&fs_info->trans_mutex);
1634 mutex_init(&fs_info->ordered_operations_mutex);
1630 mutex_init(&fs_info->tree_log_mutex); 1635 mutex_init(&fs_info->tree_log_mutex);
1631 mutex_init(&fs_info->drop_mutex); 1636 mutex_init(&fs_info->drop_mutex);
1632 mutex_init(&fs_info->extent_ins_mutex);
1633 mutex_init(&fs_info->pinned_mutex);
1634 mutex_init(&fs_info->chunk_mutex); 1637 mutex_init(&fs_info->chunk_mutex);
1635 mutex_init(&fs_info->transaction_kthread_mutex); 1638 mutex_init(&fs_info->transaction_kthread_mutex);
1636 mutex_init(&fs_info->cleaner_mutex); 1639 mutex_init(&fs_info->cleaner_mutex);
1637 mutex_init(&fs_info->volume_mutex); 1640 mutex_init(&fs_info->volume_mutex);
1638 mutex_init(&fs_info->tree_reloc_mutex); 1641 mutex_init(&fs_info->tree_reloc_mutex);
1642 init_rwsem(&fs_info->extent_commit_sem);
1643
1644 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
1645 btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
1646
1639 init_waitqueue_head(&fs_info->transaction_throttle); 1647 init_waitqueue_head(&fs_info->transaction_throttle);
1640 init_waitqueue_head(&fs_info->transaction_wait); 1648 init_waitqueue_head(&fs_info->transaction_wait);
1641 init_waitqueue_head(&fs_info->async_submit_wait); 1649 init_waitqueue_head(&fs_info->async_submit_wait);
@@ -1670,17 +1678,23 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1670 if (features) { 1678 if (features) {
1671 printk(KERN_ERR "BTRFS: couldn't mount because of " 1679 printk(KERN_ERR "BTRFS: couldn't mount because of "
1672 "unsupported optional features (%Lx).\n", 1680 "unsupported optional features (%Lx).\n",
1673 features); 1681 (unsigned long long)features);
1674 err = -EINVAL; 1682 err = -EINVAL;
1675 goto fail_iput; 1683 goto fail_iput;
1676 } 1684 }
1677 1685
1686 features = btrfs_super_incompat_flags(disk_super);
1687 if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
1688 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
1689 btrfs_set_super_incompat_flags(disk_super, features);
1690 }
1691
1678 features = btrfs_super_compat_ro_flags(disk_super) & 1692 features = btrfs_super_compat_ro_flags(disk_super) &
1679 ~BTRFS_FEATURE_COMPAT_RO_SUPP; 1693 ~BTRFS_FEATURE_COMPAT_RO_SUPP;
1680 if (!(sb->s_flags & MS_RDONLY) && features) { 1694 if (!(sb->s_flags & MS_RDONLY) && features) {
1681 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " 1695 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
1682 "unsupported option features (%Lx).\n", 1696 "unsupported option features (%Lx).\n",
1683 features); 1697 (unsigned long long)features);
1684 err = -EINVAL; 1698 err = -EINVAL;
1685 goto fail_iput; 1699 goto fail_iput;
1686 } 1700 }
@@ -1772,7 +1786,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1772 if (ret) { 1786 if (ret) {
1773 printk(KERN_WARNING "btrfs: failed to read the system " 1787 printk(KERN_WARNING "btrfs: failed to read the system "
1774 "array on %s\n", sb->s_id); 1788 "array on %s\n", sb->s_id);
1775 goto fail_sys_array; 1789 goto fail_sb_buffer;
1776 } 1790 }
1777 1791
1778 blocksize = btrfs_level_size(tree_root, 1792 blocksize = btrfs_level_size(tree_root,
@@ -1786,6 +1800,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1786 btrfs_super_chunk_root(disk_super), 1800 btrfs_super_chunk_root(disk_super),
1787 blocksize, generation); 1801 blocksize, generation);
1788 BUG_ON(!chunk_root->node); 1802 BUG_ON(!chunk_root->node);
1803 if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
1804 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
1805 sb->s_id);
1806 goto fail_chunk_root;
1807 }
1808 btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
1809 chunk_root->commit_root = btrfs_root_node(chunk_root);
1789 1810
1790 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, 1811 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
1791 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), 1812 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
@@ -1811,7 +1832,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1811 blocksize, generation); 1832 blocksize, generation);
1812 if (!tree_root->node) 1833 if (!tree_root->node)
1813 goto fail_chunk_root; 1834 goto fail_chunk_root;
1814 1835 if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
1836 printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
1837 sb->s_id);
1838 goto fail_tree_root;
1839 }
1840 btrfs_set_root_node(&tree_root->root_item, tree_root->node);
1841 tree_root->commit_root = btrfs_root_node(tree_root);
1815 1842
1816 ret = find_and_setup_root(tree_root, fs_info, 1843 ret = find_and_setup_root(tree_root, fs_info,
1817 BTRFS_EXTENT_TREE_OBJECTID, extent_root); 1844 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
@@ -1821,14 +1848,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1821 1848
1822 ret = find_and_setup_root(tree_root, fs_info, 1849 ret = find_and_setup_root(tree_root, fs_info,
1823 BTRFS_DEV_TREE_OBJECTID, dev_root); 1850 BTRFS_DEV_TREE_OBJECTID, dev_root);
1824 dev_root->track_dirty = 1;
1825 if (ret) 1851 if (ret)
1826 goto fail_extent_root; 1852 goto fail_extent_root;
1853 dev_root->track_dirty = 1;
1827 1854
1828 ret = find_and_setup_root(tree_root, fs_info, 1855 ret = find_and_setup_root(tree_root, fs_info,
1829 BTRFS_CSUM_TREE_OBJECTID, csum_root); 1856 BTRFS_CSUM_TREE_OBJECTID, csum_root);
1830 if (ret) 1857 if (ret)
1831 goto fail_extent_root; 1858 goto fail_dev_root;
1832 1859
1833 csum_root->track_dirty = 1; 1860 csum_root->track_dirty = 1;
1834 1861
@@ -1850,6 +1877,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1850 if (IS_ERR(fs_info->transaction_kthread)) 1877 if (IS_ERR(fs_info->transaction_kthread))
1851 goto fail_cleaner; 1878 goto fail_cleaner;
1852 1879
1880 if (!btrfs_test_opt(tree_root, SSD) &&
1881 !btrfs_test_opt(tree_root, NOSSD) &&
1882 !fs_info->fs_devices->rotating) {
1883 printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD "
1884 "mode\n");
1885 btrfs_set_opt(fs_info->mount_opt, SSD);
1886 }
1887
1853 if (btrfs_super_log_root(disk_super) != 0) { 1888 if (btrfs_super_log_root(disk_super) != 0) {
1854 u64 bytenr = btrfs_super_log_root(disk_super); 1889 u64 bytenr = btrfs_super_log_root(disk_super);
1855 1890
@@ -1882,7 +1917,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1882 } 1917 }
1883 1918
1884 if (!(sb->s_flags & MS_RDONLY)) { 1919 if (!(sb->s_flags & MS_RDONLY)) {
1885 ret = btrfs_cleanup_reloc_trees(tree_root); 1920 ret = btrfs_recover_relocation(tree_root);
1886 BUG_ON(ret); 1921 BUG_ON(ret);
1887 } 1922 }
1888 1923
@@ -1893,6 +1928,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1893 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); 1928 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
1894 if (!fs_info->fs_root) 1929 if (!fs_info->fs_root)
1895 goto fail_trans_kthread; 1930 goto fail_trans_kthread;
1931
1896 return tree_root; 1932 return tree_root;
1897 1933
1898fail_trans_kthread: 1934fail_trans_kthread:
@@ -1909,14 +1945,19 @@ fail_cleaner:
1909 1945
1910fail_csum_root: 1946fail_csum_root:
1911 free_extent_buffer(csum_root->node); 1947 free_extent_buffer(csum_root->node);
1948 free_extent_buffer(csum_root->commit_root);
1949fail_dev_root:
1950 free_extent_buffer(dev_root->node);
1951 free_extent_buffer(dev_root->commit_root);
1912fail_extent_root: 1952fail_extent_root:
1913 free_extent_buffer(extent_root->node); 1953 free_extent_buffer(extent_root->node);
1954 free_extent_buffer(extent_root->commit_root);
1914fail_tree_root: 1955fail_tree_root:
1915 free_extent_buffer(tree_root->node); 1956 free_extent_buffer(tree_root->node);
1957 free_extent_buffer(tree_root->commit_root);
1916fail_chunk_root: 1958fail_chunk_root:
1917 free_extent_buffer(chunk_root->node); 1959 free_extent_buffer(chunk_root->node);
1918fail_sys_array: 1960 free_extent_buffer(chunk_root->commit_root);
1919 free_extent_buffer(dev_root->node);
1920fail_sb_buffer: 1961fail_sb_buffer:
1921 btrfs_stop_workers(&fs_info->fixup_workers); 1962 btrfs_stop_workers(&fs_info->fixup_workers);
1922 btrfs_stop_workers(&fs_info->delalloc_workers); 1963 btrfs_stop_workers(&fs_info->delalloc_workers);
@@ -1932,8 +1973,8 @@ fail_iput:
1932 1973
1933 btrfs_close_devices(fs_info->fs_devices); 1974 btrfs_close_devices(fs_info->fs_devices);
1934 btrfs_mapping_tree_free(&fs_info->mapping_tree); 1975 btrfs_mapping_tree_free(&fs_info->mapping_tree);
1976fail_bdi:
1935 bdi_destroy(&fs_info->bdi); 1977 bdi_destroy(&fs_info->bdi);
1936
1937fail: 1978fail:
1938 kfree(extent_root); 1979 kfree(extent_root);
1939 kfree(tree_root); 1980 kfree(tree_root);
@@ -2006,6 +2047,17 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
2006 return latest; 2047 return latest;
2007} 2048}
2008 2049
2050/*
2051 * this should be called twice, once with wait == 0 and
2052 * once with wait == 1. When wait == 0 is done, all the buffer heads
2053 * we write are pinned.
2054 *
2055 * They are released when wait == 1 is done.
2056 * max_mirrors must be the same for both runs, and it indicates how
2057 * many supers on this one device should be written.
2058 *
2059 * max_mirrors == 0 means to write them all.
2060 */
2009static int write_dev_supers(struct btrfs_device *device, 2061static int write_dev_supers(struct btrfs_device *device,
2010 struct btrfs_super_block *sb, 2062 struct btrfs_super_block *sb,
2011 int do_barriers, int wait, int max_mirrors) 2063 int do_barriers, int wait, int max_mirrors)
@@ -2041,12 +2093,16 @@ static int write_dev_supers(struct btrfs_device *device,
2041 bh = __find_get_block(device->bdev, bytenr / 4096, 2093 bh = __find_get_block(device->bdev, bytenr / 4096,
2042 BTRFS_SUPER_INFO_SIZE); 2094 BTRFS_SUPER_INFO_SIZE);
2043 BUG_ON(!bh); 2095 BUG_ON(!bh);
2044 brelse(bh);
2045 wait_on_buffer(bh); 2096 wait_on_buffer(bh);
2046 if (buffer_uptodate(bh)) { 2097 if (!buffer_uptodate(bh))
2047 brelse(bh); 2098 errors++;
2048 continue; 2099
2049 } 2100 /* drop our reference */
2101 brelse(bh);
2102
2103 /* drop the reference from the wait == 0 run */
2104 brelse(bh);
2105 continue;
2050 } else { 2106 } else {
2051 btrfs_set_super_bytenr(sb, bytenr); 2107 btrfs_set_super_bytenr(sb, bytenr);
2052 2108
@@ -2057,12 +2113,18 @@ static int write_dev_supers(struct btrfs_device *device,
2057 BTRFS_CSUM_SIZE); 2113 BTRFS_CSUM_SIZE);
2058 btrfs_csum_final(crc, sb->csum); 2114 btrfs_csum_final(crc, sb->csum);
2059 2115
2116 /*
2117 * one reference for us, and we leave it for the
2118 * caller
2119 */
2060 bh = __getblk(device->bdev, bytenr / 4096, 2120 bh = __getblk(device->bdev, bytenr / 4096,
2061 BTRFS_SUPER_INFO_SIZE); 2121 BTRFS_SUPER_INFO_SIZE);
2062 memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); 2122 memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
2063 2123
2064 set_buffer_uptodate(bh); 2124 /* one reference for submit_bh */
2065 get_bh(bh); 2125 get_bh(bh);
2126
2127 set_buffer_uptodate(bh);
2066 lock_buffer(bh); 2128 lock_buffer(bh);
2067 bh->b_end_io = btrfs_end_buffer_write_sync; 2129 bh->b_end_io = btrfs_end_buffer_write_sync;
2068 } 2130 }
@@ -2074,30 +2136,24 @@ static int write_dev_supers(struct btrfs_device *device,
2074 device->name); 2136 device->name);
2075 set_buffer_uptodate(bh); 2137 set_buffer_uptodate(bh);
2076 device->barriers = 0; 2138 device->barriers = 0;
2139 /* one reference for submit_bh */
2077 get_bh(bh); 2140 get_bh(bh);
2078 lock_buffer(bh); 2141 lock_buffer(bh);
2079 ret = submit_bh(WRITE, bh); 2142 ret = submit_bh(WRITE_SYNC, bh);
2080 } 2143 }
2081 } else { 2144 } else {
2082 ret = submit_bh(WRITE, bh); 2145 ret = submit_bh(WRITE_SYNC, bh);
2083 } 2146 }
2084 2147
2085 if (!ret && wait) { 2148 if (ret)
2086 wait_on_buffer(bh);
2087 if (!buffer_uptodate(bh))
2088 errors++;
2089 } else if (ret) {
2090 errors++; 2149 errors++;
2091 }
2092 if (wait)
2093 brelse(bh);
2094 } 2150 }
2095 return errors < i ? 0 : -1; 2151 return errors < i ? 0 : -1;
2096} 2152}
2097 2153
2098int write_all_supers(struct btrfs_root *root, int max_mirrors) 2154int write_all_supers(struct btrfs_root *root, int max_mirrors)
2099{ 2155{
2100 struct list_head *head = &root->fs_info->fs_devices->devices; 2156 struct list_head *head;
2101 struct btrfs_device *dev; 2157 struct btrfs_device *dev;
2102 struct btrfs_super_block *sb; 2158 struct btrfs_super_block *sb;
2103 struct btrfs_dev_item *dev_item; 2159 struct btrfs_dev_item *dev_item;
@@ -2112,6 +2168,9 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2112 2168
2113 sb = &root->fs_info->super_for_commit; 2169 sb = &root->fs_info->super_for_commit;
2114 dev_item = &sb->dev_item; 2170 dev_item = &sb->dev_item;
2171
2172 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2173 head = &root->fs_info->fs_devices->devices;
2115 list_for_each_entry(dev, head, dev_list) { 2174 list_for_each_entry(dev, head, dev_list) {
2116 if (!dev->bdev) { 2175 if (!dev->bdev) {
2117 total_errors++; 2176 total_errors++;
@@ -2155,6 +2214,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2155 if (ret) 2214 if (ret)
2156 total_errors++; 2215 total_errors++;
2157 } 2216 }
2217 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2158 if (total_errors > max_errors) { 2218 if (total_errors > max_errors) {
2159 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 2219 printk(KERN_ERR "btrfs: %d errors while writing supers\n",
2160 total_errors); 2220 total_errors);
@@ -2174,6 +2234,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
2174 2234
2175int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2235int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2176{ 2236{
2237 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
2177 radix_tree_delete(&fs_info->fs_roots_radix, 2238 radix_tree_delete(&fs_info->fs_roots_radix,
2178 (unsigned long)root->root_key.objectid); 2239 (unsigned long)root->root_key.objectid);
2179 if (root->anon_super.s_dev) { 2240 if (root->anon_super.s_dev) {
@@ -2220,10 +2281,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
2220 ARRAY_SIZE(gang)); 2281 ARRAY_SIZE(gang));
2221 if (!ret) 2282 if (!ret)
2222 break; 2283 break;
2284
2285 root_objectid = gang[ret - 1]->root_key.objectid + 1;
2223 for (i = 0; i < ret; i++) { 2286 for (i = 0; i < ret; i++) {
2224 root_objectid = gang[i]->root_key.objectid; 2287 root_objectid = gang[i]->root_key.objectid;
2225 ret = btrfs_find_dead_roots(fs_info->tree_root, 2288 ret = btrfs_find_dead_roots(fs_info->tree_root,
2226 root_objectid, gang[i]); 2289 root_objectid);
2227 BUG_ON(ret); 2290 BUG_ON(ret);
2228 btrfs_orphan_cleanup(gang[i]); 2291 btrfs_orphan_cleanup(gang[i]);
2229 } 2292 }
@@ -2270,31 +2333,31 @@ int close_ctree(struct btrfs_root *root)
2270 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 2333 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
2271 } 2334 }
2272 2335
2336 fs_info->closing = 2;
2337 smp_mb();
2338
2273 if (fs_info->delalloc_bytes) { 2339 if (fs_info->delalloc_bytes) {
2274 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", 2340 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
2275 fs_info->delalloc_bytes); 2341 (unsigned long long)fs_info->delalloc_bytes);
2276 } 2342 }
2277 if (fs_info->total_ref_cache_size) { 2343 if (fs_info->total_ref_cache_size) {
2278 printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", 2344 printk(KERN_INFO "btrfs: at umount reference cache size %llu\n",
2279 (unsigned long long)fs_info->total_ref_cache_size); 2345 (unsigned long long)fs_info->total_ref_cache_size);
2280 } 2346 }
2281 2347
2282 if (fs_info->extent_root->node) 2348 free_extent_buffer(fs_info->extent_root->node);
2283 free_extent_buffer(fs_info->extent_root->node); 2349 free_extent_buffer(fs_info->extent_root->commit_root);
2284 2350 free_extent_buffer(fs_info->tree_root->node);
2285 if (fs_info->tree_root->node) 2351 free_extent_buffer(fs_info->tree_root->commit_root);
2286 free_extent_buffer(fs_info->tree_root->node); 2352 free_extent_buffer(root->fs_info->chunk_root->node);
2287 2353 free_extent_buffer(root->fs_info->chunk_root->commit_root);
2288 if (root->fs_info->chunk_root->node) 2354 free_extent_buffer(root->fs_info->dev_root->node);
2289 free_extent_buffer(root->fs_info->chunk_root->node); 2355 free_extent_buffer(root->fs_info->dev_root->commit_root);
2290 2356 free_extent_buffer(root->fs_info->csum_root->node);
2291 if (root->fs_info->dev_root->node) 2357 free_extent_buffer(root->fs_info->csum_root->commit_root);
2292 free_extent_buffer(root->fs_info->dev_root->node);
2293
2294 if (root->fs_info->csum_root->node)
2295 free_extent_buffer(root->fs_info->csum_root->node);
2296 2358
2297 btrfs_free_block_groups(root->fs_info); 2359 btrfs_free_block_groups(root->fs_info);
2360 btrfs_free_pinned_extents(root->fs_info);
2298 2361
2299 del_fs_roots(fs_info); 2362 del_fs_roots(fs_info);
2300 2363
@@ -2309,16 +2372,6 @@ int close_ctree(struct btrfs_root *root)
2309 btrfs_stop_workers(&fs_info->endio_write_workers); 2372 btrfs_stop_workers(&fs_info->endio_write_workers);
2310 btrfs_stop_workers(&fs_info->submit_workers); 2373 btrfs_stop_workers(&fs_info->submit_workers);
2311 2374
2312#if 0
2313 while (!list_empty(&fs_info->hashers)) {
2314 struct btrfs_hasher *hasher;
2315 hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
2316 hashers);
2317 list_del(&hasher->hashers);
2318 crypto_free_hash(&fs_info->hash_tfm);
2319 kfree(hasher);
2320 }
2321#endif
2322 btrfs_close_devices(fs_info->fs_devices); 2375 btrfs_close_devices(fs_info->fs_devices);
2323 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2376 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2324 2377
@@ -2358,8 +2411,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
2358 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 2411 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
2359 u64 transid = btrfs_header_generation(buf); 2412 u64 transid = btrfs_header_generation(buf);
2360 struct inode *btree_inode = root->fs_info->btree_inode; 2413 struct inode *btree_inode = root->fs_info->btree_inode;
2361 2414 int was_dirty;
2362 btrfs_set_lock_blocking(buf);
2363 2415
2364 btrfs_assert_tree_locked(buf); 2416 btrfs_assert_tree_locked(buf);
2365 if (transid != root->fs_info->generation) { 2417 if (transid != root->fs_info->generation) {
@@ -2370,7 +2422,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
2370 (unsigned long long)root->fs_info->generation); 2422 (unsigned long long)root->fs_info->generation);
2371 WARN_ON(1); 2423 WARN_ON(1);
2372 } 2424 }
2373 set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); 2425 was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
2426 buf);
2427 if (!was_dirty) {
2428 spin_lock(&root->fs_info->delalloc_lock);
2429 root->fs_info->dirty_metadata_bytes += buf->len;
2430 spin_unlock(&root->fs_info->delalloc_lock);
2431 }
2374} 2432}
2375 2433
2376void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) 2434void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
@@ -2379,17 +2437,14 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2379 * looks as though older kernels can get into trouble with 2437 * looks as though older kernels can get into trouble with
2380 * this code, they end up stuck in balance_dirty_pages forever 2438 * this code, they end up stuck in balance_dirty_pages forever
2381 */ 2439 */
2382 struct extent_io_tree *tree;
2383 u64 num_dirty; 2440 u64 num_dirty;
2384 u64 start = 0;
2385 unsigned long thresh = 32 * 1024 * 1024; 2441 unsigned long thresh = 32 * 1024 * 1024;
2386 tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
2387 2442
2388 if (current_is_pdflush() || current->flags & PF_MEMALLOC) 2443 if (current->flags & PF_MEMALLOC)
2389 return; 2444 return;
2390 2445
2391 num_dirty = count_range_bits(tree, &start, (u64)-1, 2446 num_dirty = root->fs_info->dirty_metadata_bytes;
2392 thresh, EXTENT_DIRTY); 2447
2393 if (num_dirty > thresh) { 2448 if (num_dirty > thresh) {
2394 balance_dirty_pages_ratelimited_nr( 2449 balance_dirty_pages_ratelimited_nr(
2395 root->fs_info->btree_inode->i_mapping, 1); 2450 root->fs_info->btree_inode->i_mapping, 1);
@@ -2410,6 +2465,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
2410int btree_lock_page_hook(struct page *page) 2465int btree_lock_page_hook(struct page *page)
2411{ 2466{
2412 struct inode *inode = page->mapping->host; 2467 struct inode *inode = page->mapping->host;
2468 struct btrfs_root *root = BTRFS_I(inode)->root;
2413 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2469 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2414 struct extent_buffer *eb; 2470 struct extent_buffer *eb;
2415 unsigned long len; 2471 unsigned long len;
@@ -2425,6 +2481,16 @@ int btree_lock_page_hook(struct page *page)
2425 2481
2426 btrfs_tree_lock(eb); 2482 btrfs_tree_lock(eb);
2427 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); 2483 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
2484
2485 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
2486 spin_lock(&root->fs_info->delalloc_lock);
2487 if (root->fs_info->dirty_metadata_bytes >= eb->len)
2488 root->fs_info->dirty_metadata_bytes -= eb->len;
2489 else
2490 WARN_ON(1);
2491 spin_unlock(&root->fs_info->delalloc_lock);
2492 }
2493
2428 btrfs_tree_unlock(eb); 2494 btrfs_tree_unlock(eb);
2429 free_extent_buffer(eb); 2495 free_extent_buffer(eb);
2430out: 2496out: